Code
<- pdftools::pdf_text("homo_deus_chapter_1.pdf") raw
Luke Heley
May 30, 2023
section_text <- dplyr::tibble(manuscript) |>
dplyr::mutate(section = stringr::str_split(manuscript, "\n\n\n")) |>
tidyr::unnest(cols = "section") |>
dplyr::select(-manuscript) |>
dplyr::filter(!stringr::str_detect(section, "\n\n")) |>
dplyr::slice(3:dplyr::n()) |>
dplyr::mutate(section = stringr::str_trim(section)) |>
tidyr::separate(section, c("section", "text"), "\n", extra = "merge") |>
dplyr::mutate(section_id = 1:dplyr::n())